/*

Main input:
	> tmp/panel_wahlen_covted_ltw18.dta [elections w/ addresses, harmonized boundaries to LTW 2018]
	> newdata/wahllokal_change.dta 		[changes in polling places (PP,(used to define treatment]
	> tmp/voter_panel_pp_dist.dta 		[distances b/w old and new PP]
	
Output: newdata/estimation_prep_ltw18_voter.dta [final dataset at address level]

Main tasks:	
	> Merge relevant datasets
	> Gen different treatment variables (change in polling location, distance change)
	> Drop 2009 data (only used to define changes for 2013)
				
*/


//// Voter-ADDRESS LEVEL ////		

*** Pull data: address-level data, converted to 2018 boundaries
	use "$tmp/panel_wahlen_covted_ltw18.dta", clear
	
* Drop redundant variables
		drop lat* lon*
		
** MERGES			
	* MERGE PP-specific info
		merge m:1 wl_id wahl_id using "$newdata/wahllokal_change" , assert(2 3) keep(3) nogen	
		assert wl_active == 1
		drop wl_active 	
		
	
	* MERGE distances b/w old and new PP
	merge 1:1 voter_id wahl_id using "$tmp/voter_panel_pp_dist", assert(1 3) keepusing(wl_street_dist)
	assert wahl_id==0 if _merge==1
	drop _merge 
	

** Gen Variables 	
	* gen: changed_wl = 1 (for voter_id) if PP changed relative to last election
		gen 	changed_wl = 0
		bys voter_id (wahl_id): replace changed_wl = 1 if wl_id[_n] != wl_id[_n-1] & wahl_id > 0
		lab var changed_wl "=1 if PP changed from last election"
		
		
	* GEN TREATMENT variables
	 * treat_simple := indicator if PP changed
		gen 	treat_simple = changed_wl
		lab var treat_simple "Treated: PP changed for any reason (= changed_wl)"
		
	 * del_street_dist:= absolute change in distance
		bys voter_id (wahl_id): gen del_street_dist = street_dist[_n] - street_dist[_n-1]
		replace del_street_dist = 0 if changed_wl == 0
		lab var del_street_dist "Change in street_dist (due to PP change)"
		
	
	* GEN: street_increased/ street_decreased := dummy whether distance change is positive (negative)
		gen		 	street_increased = (del_street_dist>0 & treat_simple==1) // distance increased
		label var 	street_increased "Dummy if street distance increased"
		gen 		street_decreased = (del_street_dist<0 & treat_simple==1)		// distance decreased
		label var 	street_decreased "Dummy if street distance decreased"			
	
	 * Reason for change in polling location
	 
		* Assume REASON= "recruitment" if new PP inactive before OR old PP inactive now
		* and 	 REASON= "precinct reconfiguration/consolidation" else
			// treat_consol : = 1 if new PP active before AND old PP still active
			bys voter_id (wahl_id): gen treat_consol = (changed_wl==1 & (wl_added == 0 & wl_dropped_lag[_n-1] == 0) )
			lab var treat_consol "Treated: New PP active before and old PP still active"
			
			// treat_no_consol : = if new PP previously inactive OR previous PP is inactive now 
			bys voter_id (wahl_id): gen treat_no_consol = (changed_wl==1 & (wl_added == 1 | wl_dropped_lag[_n-1] == 1))
			lab var treat_no_consol "Treated: New PP prev. inactive or previous PP now inactive"
	
			assert treat_simple == treat_consol + treat_no_consol
			
			// How often did a PP reassignment coincide with a CHANGE IN PRECINCT SIZE ?
			* sb_size_change := indicator if precinct size changed
			* 	Note: precinct SIZE can change WITHOUT a PP reassignment and (PP reassignment does not imply size change)
			cap drop tmp*
			bys sb_old wahl_id (voter_id): gen tmp_size=_N			
			bys voter_id (wahl_id): gen sb_size_change = ((tmp_size!=tmp_size[_n-1]&_n>1))
			lab var sb_size_change "Precint SIZE changed"
			drop tmp*

			
		*>>>>>>> DROP 2009 data <<<<<<<<<*
		drop if wahl_id == 0
		*>>>>>>> DROP 2009 data <<<<<<<<<*

** Finalize dataset 
	
	// Encode polling place ID (wl_id) 
	encode wl_id, gen(tmp1)
	drop wl_id
	rename tmp1 wl_id
	
 * save: address-level panel
	sort voter_id wahl_id
	compress
	save "$newdata/estimation_prep_ltw18_voter.dta", replace
